Story 6#
Andrew Bowen DATA 621 CUNY Data Science Masters
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import plotly.express as px
from urllib.request import urlopen
import plotly.figure_factory as ff
import shapely
import warnings
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning)
Data Sources#
I found US Food Security data published by the US Census Bureau here
# Original read-in, posted on GitHub for quicker read-in
# dat = pd.read_csv("https://www2.census.gov/programs-surveys/cps/datasets/2022/supp/dec22pub.csv")
dat = pd.read_parquet("https://github.com/andrewbowen19/storiesDATA608/blob/main/data/census-food-security.parquet?raw=true")
dat.head()
| HRHHID | HRMONTH | HRYEAR4 | HURESPLI | HUFINAL | FILLER | HETENURE | HEHOUSUT | HETELHHD | HETELAVL | ... | HRFS30D1 | HRFS30D2 | HRFS30D3 | HRFS30D4 | HRFS30D5 | HRFS30D6 | HRFS30D7 | HRFS30D8 | HRFS30D9 | HRFS30DE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 351819007700950 | 12 | 2022 | -1 | 225 | NaN | -1 | 1 | -1 | -1 | ... | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 |
| 1 | 143200010045972 | 12 | 2022 | -1 | 226 | NaN | -1 | 1 | -1 | -1 | ... | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 |
| 2 | 201348501120003 | 12 | 2022 | -1 | 225 | NaN | 1 | 5 | -1 | -1 | ... | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 |
| 3 | 610009070389621 | 12 | 2022 | -1 | 226 | NaN | -1 | 1 | -1 | -1 | ... | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 |
| 4 | 110862907936 | 12 | 2022 | -1 | 226 | NaN | -1 | 1 | -1 | -1 | ... | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 |
5 rows × 507 columns
dat.to_parquet("data/census-food-security.parquet")
Data Wrangling#
We’ll need to do a significant amount of column renaming. Using this data dictionary
Children’s Food Security Scale variables are coded as “Not in Universe” (-1) if there were no children in the household.
# Rename dataframe columns
column_renames = {
"HRHHID": "household_id",
"PESEX": "sex",
"HRFS12M1": "food_security_level",
"HRFS12M6": "raw_child_food_security_score",
"HRFS12MC": "child_food_security_level",
"HRFS12M8": "adult_food_security_level"
}
df = dat.rename(columns=column_renames)
df = df[list(column_renames.values())]
df.head()
| household_id | sex | food_security_level | raw_child_food_security_score | child_food_security_level | adult_food_security_level | |
|---|---|---|---|---|---|---|
| 0 | 351819007700950 | -1 | -1 | -1 | -1 | -1 |
| 1 | 143200010045972 | -1 | -1 | -1 | -1 | -1 |
| 2 | 201348501120003 | -1 | -1 | -1 | -1 | -1 |
| 3 | 610009070389621 | -1 | -1 | -1 | -1 | -1 |
| 4 | 110862907936 | -1 | -1 | -1 | -1 | -1 |
df.child_food_security_level.unique()
array([-1, 1, 2, 3, -9])
child_level_map = {3: "Food Secure", 2: "Low Food Security", 1: "Very Low Food Security", -9: "Not in Universe", -1: "No Response"}
adult_level_map = {4: "High", 3: "Marginal", 2: "Low", 1: "Very Low", -9: "Not in Universe", -1: "No Response"}
# Map values
df['child_food_security_level'] = df.child_food_security_level.map(child_level_map)
df['adult_food_security_level'] = df.adult_food_security_level.map(adult_level_map)
Data Visualization#
child_food_security = df.loc[(df.child_food_security_level!="No Response") &
(df.child_food_security_level!="Not in Universe")].groupby('child_food_security_level')['child_food_security_level'].count()
child_food_security
child_food_security_level
Food Secure 335
Low Food Security 2503
Very Low Food Security 30527
Name: child_food_security_level, dtype: int64
# Plot child food security levels
f, ax = plt.subplots(figsize=(12,10))
ax.bar(child_food_security.index, child_food_security.values)
ax.set_xlabel("Food Security Level - USDA")
ax.set_ylabel("Number of Children")
ax.set_title("The majority of survey respondents with children did not meet the USDA's definition of food security")
f.suptitle("Childhood Food Security - USDA 2022", fontsize=18)
Text(0.5, 0.98, 'Childhood Food Security - USDA 2022')
adult_food_security = df.loc[(df.adult_food_security_level!="No Response") &
(df.adult_food_security_level!="Not in Universe")].groupby('adult_food_security_level')['adult_food_security_level'].count()
adult_food_security
adult_food_security_level
High 3474
Low 6845
Marginal 5217
Very Low 59280
Name: adult_food_security_level, dtype: int64
# Plot adult food security levels
f, ax = plt.subplots(figsize=(12,10))
ax.bar(adult_food_security.index, adult_food_security.values)
ax.set_xlabel("Food Security Level - USDA")
ax.set_ylabel("Number of Adults")
ax.set_title("The majority of adult survey respondents did not meet the USDA's definition of food security")
f.suptitle("Adult Food Security - USDA 2022", fontsize=18)
Text(0.5, 0.98, 'Adult Food Security - USDA 2022')
The excel file included is posted on the US Department of Agriculture website here. This comes from their Food Atlas initiative to give an overview of food access within the United States. I’ve converted the file to parquet for easier read-in times.
# Data Dictionary for USDA Food Atlas data
# dd = pd.read_excel("/Users/andrewbowen/Downloads/FoodAccessResearchAtlasData2019.xlsx", sheet_name="Variable Lookup")
# dd.to_csv("data/food-atlass-dd.csv", index=False)
dd = pd.read_csv("https://raw.githubusercontent.com/andrewbowen19/storiesDATA608/main/data/food-atlass-dd.csv")
# Initial read in, converting to parquet format for easier storage
# Uncomment the block below and move the excel file linked into your local data folder
# food_locs = pd.read_excel("/Users/andrewbowen/Downloads/FoodAccessResearchAtlasData2019.xlsx", sheet_name="Food Access Research Atlas")
# food_locs.to_parquet("./data/food_access.parquet", index=False)
# food_locs.head()
I uploaded the dataset above as parquet files to my GitHub here
# Read in food location data
food_data_url = "https://github.com/andrewbowen19/storiesDATA608/blob/main/data/food_access.parquet?raw=true"
food_locs = pd.read_parquet(food_data_url, engine="auto")
# Cleaning up food location data
# Prepend Census tracts with digits if needed ot match Census Labels
food_locs['CensusTract'] = food_locs['CensusTract'].astype(str).str.zfill(11)
food_locs['FIPS'] = food_locs['CensusTract'].str.slice(0, 5)
food_locs.head()
| CensusTract | State | County | Urban | Pop2010 | OHU2010 | GroupQuartersFlag | NUMGQTRS | PCTGQTRS | LILATracts_1And10 | ... | TractBlack | TractAsian | TractNHOPI | TractAIAN | TractOMultir | TractHispanic | TractHUNV | TractSNAP | FIPS | county | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 01001020100 | Alabama | Autauga County | 1 | 1912 | 693 | 0 | 0.0 | 0.000000 | 0 | ... | 217.0 | 14.0 | 0.0 | 14.0 | 45.0 | 44.0 | 6.0 | 102.0 | 01001 | Autauga |
| 1 | 01001020200 | Alabama | Autauga County | 1 | 2170 | 743 | 0 | 181.0 | 8.341014 | 1 | ... | 1217.0 | 5.0 | 0.0 | 5.0 | 55.0 | 75.0 | 89.0 | 156.0 | 01001 | Autauga |
| 2 | 01001020300 | Alabama | Autauga County | 1 | 3373 | 1256 | 0 | 0.0 | 0.000000 | 0 | ... | 647.0 | 17.0 | 5.0 | 11.0 | 117.0 | 87.0 | 99.0 | 172.0 | 01001 | Autauga |
| 3 | 01001020400 | Alabama | Autauga County | 1 | 4386 | 1722 | 0 | 0.0 | 0.000000 | 0 | ... | 193.0 | 18.0 | 4.0 | 11.0 | 74.0 | 85.0 | 21.0 | 98.0 | 01001 | Autauga |
| 4 | 01001020500 | Alabama | Autauga County | 1 | 10766 | 4082 | 0 | 181.0 | 1.681219 | 0 | ... | 1437.0 | 296.0 | 9.0 | 48.0 | 310.0 | 355.0 | 230.0 | 339.0 | 01001 | Autauga |
5 rows × 149 columns
food_locs['county'] = food_locs['County'].str.replace(" County", "")
df = pd.DataFrame(food_locs.groupby(["county", "FIPS"]).mean()).reset_index()
df = df.fillna(0)
df.head()
| county | FIPS | Urban | Pop2010 | OHU2010 | GroupQuartersFlag | NUMGQTRS | PCTGQTRS | LILATracts_1And10 | LILATracts_halfAnd10 | ... | TractSeniors | TractWhite | TractBlack | TractAsian | TractNHOPI | TractAIAN | TractOMultir | TractHispanic | TractHUNV | TractSNAP | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Abbeville | 45001 | 0.166667 | 4236.166667 | 1665.000000 | 0.000000 | 150.166667 | 3.392506 | 0.333333 | 0.333333 | ... | 700.500000 | 2948.500000 | 1197.833333 | 12.500000 | 0.833333 | 10.000000 | 66.500000 | 42.500000 | 169.000000 | 325.833333 |
| 1 | Acadia Parish | 22001 | 0.500000 | 5147.750000 | 1903.416667 | 0.000000 | 87.500000 | 1.651573 | 0.250000 | 0.500000 | ... | 657.166667 | 4093.916667 | 931.250000 | 11.416667 | 0.500000 | 13.583333 | 97.083333 | 88.333333 | 153.750000 | 363.250000 |
| 2 | Accomack | 51001 | 0.000000 | 3684.888889 | 1533.111111 | 0.000000 | 47.555556 | 0.923669 | 0.111111 | 0.111111 | ... | 704.000000 | 2406.888889 | 1033.666667 | 20.333333 | 4.444444 | 15.000000 | 204.555556 | 316.666667 | 150.222222 | 213.333333 |
| 3 | Ada | 16001 | 0.932203 | 6650.254237 | 2516.016949 | 0.016949 | 164.644068 | 2.747349 | 0.016949 | 0.186441 | ... | 695.728814 | 6005.881356 | 75.271186 | 159.440678 | 14.593220 | 44.237288 | 350.830508 | 472.966102 | 109.000000 | 214.220339 |
| 4 | Adair | 19001 | 0.000000 | 2560.666667 | 1097.333333 | 0.000000 | 50.333333 | 1.737189 | 0.000000 | 0.000000 | ... | 548.000000 | 2519.333333 | 3.666667 | 7.333333 | 0.333333 | 1.333333 | 28.666667 | 33.666667 | 40.666667 | 123.666667 |
5 rows × 146 columns
County GeoJSON can be found here
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
# https://plotly.com/python/choropleth-maps/
fig = px.choropleth(df, geojson=counties, locations='FIPS', color='lakids10share',
color_continuous_scale="Inferno",
title="<b>USDA 2019:</b> Percent of children living beyond 10 miles from supermarket<br><sup>\
Some US Counties have up to 35% of children who live more than 10 miles away from a supermarket</sup>",
labels={"lakids10share": "% Children"},
scope="usa")
fig.update_traces(marker_line_width=0)
fig.show()
As we see, many US counties, particularly in the western half of the country, lack walkable access to supermarkets
# Population more than 20 miles away from supermarket
fig = px.choropleth(df, geojson=counties, locations='FIPS', color='lasnaphalfshare',
title="<b>Number of Americans Receiving SNAP Benefits: 2019<br><sup>\
With so much dependence on the program, additional funding and outreach could raise political awareness of the benefits and benefactors</sup>",
labels={"TractSNAP": "Number of Citizens"},
scope="usa")
fig.update_traces(marker_line_width=0)
fig.show()
I downloaded budget data from the US White House website. We can use this to see how the breakdown of Federal spending allocates resources to programs like SNAP which work to help feed food insecure individuals in America.
# Original read-in
# budget = pd.read_excel("/Users/andrewbowen/Downloads/hist04z2_fy2024.xlsx", header=1)
budget_path = "https://raw.githubusercontent.com/andrewbowen19/storiesDATA608/main/data/us-budget.csv"
budget = pd.read_csv(budget_path)
budget['department'] = budget['Department or other unit']
# Send to CSV locally
# budget.to_csv("data/us-budget.csv")
# Basic cleaning of budget data
budget = budget.replace('-*', np.nan).replace("..........", np.nan).replace("*", np.nan)
budget = budget.apply(pd.to_numeric, errors='coerce').fillna(budget)
# Convert to floats and only take positive values representing percentage of overall budget
budget['2022'] = budget['2022'].astype(float)
budget = budget.loc[(budget['2022'] >= 0.0) & (budget['department'] != "Total outlays")]
# Highlight Dep of Ag (including SNAP) rows
budget['is_agriculture'] = np.where(budget['department'] == "Department of Agriculture", 'r', 'b')
# Sort by percentage of budget allocated
budget = budget[['department', '2022', 'is_agriculture']].sort_values(by="2022", ascending=True).set_index("department")
# Plotting budget by department
ag_title = "<b>US Government Budget by Department</b><br><sup>The department of Agriculture (which contains the SNAP program) receives only 3.9% of our overall budget.<br>Are we allocating resources adequately to feed americans?</sup>"
fig = px.bar(budget,
x='2022', orientation='h',
title=ag_title,
width=1200, height=800,
labels={
"department": "US Government Department",
"2022": "Percentage of 2022 Federal Budget Overlays"
},
color='is_agriculture', category_orders={'department': budget.index[::-1]}
)
fig.show()
The SNAP program takes up a very small percentage of the overall US Budget. INcreasing the resourcing allocated could be an effective step in the fight against hunger in America.